home *** CD-ROM | disk | FTP | other *** search
- /*
- * Bawk C actions compiler
- */
- #include <stdio.h>
- #include "bawk.h"
-
- act_compile(actbuf)
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = ACTION;
- return stmt_compile(actbuf);
- }
-
- pat_compile(actbuf)
- char *actbuf; /* where tokenized actions are compiled into */
- {
- Where = PATTERN;
- return stmt_compile(actbuf);
- }
-
- stmt_compile(actbuf)
- char *actbuf; /* where tokenized actions are compiled into */
- {
- /*
- * Read and tokenize C actions from current input file into the
- * action buffer. Strip out comments and whitespace in the process.
- */
- char *actptr, /* actbuf pointer */
- *cp, /* work pointer */
- buf[MAXLINELEN]; /* string buffer */
- int braces, /* counts '{}' pairs - return when 0 */
- parens, /* counts '()' pairs */
- i, /* temp */
- c; /* current input character */
-
- braces = parens = 0;
- actptr = actbuf;
- while ((c = getcharacter()) != -1) {
- /*
- * Skip over spaces, tabs and newlines
- */
- if (c == ' ' || c == '\t' || c == '\n')
- continue;
- if (c == '#') {
- /*
- * Skip comments. Comments start with a '#' and end
- * at the next newline.
- */
- while ((c = getcharacter()) != -1 && c != '\n');
- continue;
- }
-
- if (c == '{') {
- if (Where == PATTERN) {
- /*
- * We're compiling a pattern. The '{' marks
- * the beginning of an action statement. Push
- * the character back and return.
- */
- ungetcharacter('{');
- break;
- }
- else {
- /*
- * We must be compiling an action statement.
- * '{'s mark beginning of action or compound
- * statements.
- */
- ++braces;
- *actptr++ = T_LBRACE;
- }
- }
- else if (c == '}') {
- *actptr++ = T_RBRACE;
- if (!--braces)
- /*
- * Found the end of the action string
- */
- break;
- }
- else if (c == '(') {
- ++parens;
- *actptr++ = T_LPAREN;
- }
- else if (c == ')') {
- if (--parens < 0)
- error("mismatched '()'", ACT_ERROR);
- *actptr++ = T_RPAREN;
- }
- else if (c == ',' && !braces && !parens && Where == PATTERN) {
- /*
- * found a comma outside of any braces or parens-
- * this must be a regular expression seperator.
- */
- ungetcharacter(',');
- break;
- }
-
- /*
- * Check if it's a regular expression:
- */
- else if (c == '/') {
- /*
- * A '/' inside a pattern string starts a regular
- * expression. Inside action strings, a '/' is the
- * division operator.
- */
- if (Where == PATTERN)
- goto dopattern;
- else
- *actptr++ = T_DIV;
- }
- else if (c == '@') {
- dopattern:
- /*
- * Within action strings, only the '@' may be used to
- * delimit regular expressions
- */
- *actptr++ = T_REGEXP;
- ungetcharacter(c);
- actptr += re_compile(actptr);
- }
-
- /*
- * symbol, string or constant:
- */
- else if (alpha(c)) {
- /*
- * It's a symbol reference. Copy the symbol into
- * string buffer.
- */
- cp = buf;
- do
- *cp++ = c;
- while ((c = getcharacter()) != -1 && alphanum(c));
- ungetcharacter(c);
- *cp = 0;
- /*
- * Check if a keyword, builtin function or variable.
- */
- if (c = iskeyword(buf))
- *actptr++ = c;
- else if (i = isfunction(buf)) {
- *actptr++ = T_FUNCTION;
- storeint(actptr, i);
- actptr += sizeof(i);
- }
- else {
- /*
- * It's a symbol name.
- */
- *actptr++ = T_VARIABLE;
- if (!(cp = (char *) findvar(buf)))
- cp = (char *) addvar(buf);
- storeptr(actptr, cp);
- actptr += sizeof(cp);
- }
- }
-
- else if (c == '"') {
- /*
- * It's a string constant
- */
- *actptr++ = T_STRING;
- actptr = str_compile(actptr, '"');
- }
- else if (c == '\'') {
- /*
- * It's a character constant
- */
- *actptr++ = T_CONSTANT;
- str_compile(buf, '\'');
- storeint(actptr, *buf);
- actptr += sizeof(i);
- }
-
- else if (num(c)) {
- /*
- * It's a numeric constant
- */
- *actptr++ = T_CONSTANT;
- cp = buf;
- do
- *cp++ = c;
- while ((c = getcharacter()) != -1 && num(c));
- ungetcharacter(c);
- *cp = 0;
- storeint(actptr, atoi(buf));
- actptr += sizeof(i);
- }
-
- /*
- * unary operator:
- */
- else if (c == '$')
- *actptr++ = T_DOLLAR;
-
- /*
- * or binary operator:
- */
- else if (c == '=') {
- if ((c = getcharacter()) == '=')
- *actptr++ = T_EQ;
- else {
- ungetcharacter(c);
- *actptr++ = T_ASSIGN;
- }
- }
-
- else if (c == '!') {
- if ((c = getcharacter()) == '=')
- *actptr++ = T_NE;
- else {
- ungetcharacter(c);
- *actptr++ = T_LNOT;
- }
- }
-
- else if (c == '<') {
- if ((c = getcharacter()) == '<')
- *actptr++ = T_SHL;
- else if (c == '=')
- *actptr++ = T_LE;
- else {
- ungetcharacter(c);
- *actptr++ = T_LT;
- }
- }
-
- else if (c == '>') {
- if ((c = getcharacter()) == '>')
- *actptr++ = T_SHR;
- else if (c == '=')
- *actptr++ = T_GE;
- else {
- ungetcharacter(c);
- *actptr++ = T_GT;
- }
- }
-
- else if (c == '&') {
- if ((c = getcharacter()) == '&')
- *actptr++ = T_LAND;
- else {
- ungetcharacter(c);
- *actptr++ = T_AND;
- }
- }
-
- else if (c == '|') {
- if ((c = getcharacter()) == '|')
- *actptr++ = T_LIOR;
- else {
- ungetcharacter(c);
- *actptr++ = T_IOR;
- }
- }
- else if (c == '+') {
- if ((c = getcharacter()) == '+')
- *actptr++ = T_INCR;
- else {
- ungetcharacter(c);
- *actptr++ = T_ADD;
- }
- }
-
- else if (c == '-') {
- if ((c = getcharacter()) == '-')
- *actptr++ = T_DECR;
- else {
- ungetcharacter(c);
- *actptr++ = T_SUB;
- }
- }
-
- /*
- * punctuation
- */
- else if (instr(c, "[](),;*/%+-^~"))
- *actptr++ = c;
-
- else {
- /*
- * Bad character in input line
- */
- error("lexical error", ACT_ERROR);
- }
-
- if (actptr >= Workbuf + MAXWORKBUFLEN)
- error("action too long", MEM_ERROR);
- }
- if (braces || parens)
- error("mismatched '{}' or '()'", ACT_ERROR);
-
- *actptr++ = T_EOF;
-
- return actptr - actbuf;
- }
-
- char *
- str_compile(str, delim)
- char *str, delim;
- {
- /*
- * Compile a string from current input file into the given string
- * buffer. Stop when input character is the delimiter in "delim".
- * Returns a pointer to the first character after the string.
- */
- int c;
- char buf[MAXLINELEN];
-
- while ((c = getcharacter()) != -1 && c != delim) {
- if (c == '\\') {
- switch (c = getcharacter()) {
- case -1:
- goto err;
- case 'b':
- c = '\b';
- break;
- case 'n':
- c = '\n';
- break;
- case 't':
- c = '\t';
- break;
- case 'f':
- c = '\f';
- break;
- case 'r':
- c = '\r';
- break;
- case '0':
- case '1':
- case '2':
- case '3':
- *buf = c;
- for (c = 1; c < 3; ++c) {
- if ((buf[c] = getcharacter()) == -1)
- goto err;
- }
- buf[c] = 0;
- sscanf(buf, "%o", &c);
- break;
- case '\n':
- if (getcharacter() == -1)
- goto err;
- default:
- if ((c = getcharacter()) == -1)
- goto err;
- }
- }
- *str++ = c;
- }
- *str++ = 0;
-
- return (str);
- err:
- sprintf(buf, "missing %c delimiter", delim);
- error(buf, 4);
- }
-
- storeint(ip, i)
- int *ip, i;
- {
- return *ip = i;
- }
-
- char *
- storeptr(pp, p)
- char **pp, *p;
- {
- return (*pp = p);
- }
-
- fetchint(ip)
- int *ip;
- {
- return *ip;
- }
-
- char *
- fetchptr(pp)
- char **pp;
- {
- return *pp;
- }
-
- getoken()
- {
- char *cp;
- int i;
-
- switch (Token = *Actptr++) {
- case T_STRING:
- case T_REGEXP:
- Value.dptr = Actptr;
- Actptr += strlen(Actptr) + 1;
- break;
- case T_VARIABLE:
- Value.dptr = fetchptr(Actptr);
- Actptr += sizeof(cp);
- break;
- case T_FUNCTION:
- case T_CONSTANT:
- Value.ival = fetchint(Actptr);
- Actptr += sizeof(i);
- break;
- case T_EOF:
- --Actptr;
- default:
- Value.dptr = 0;
- }
-
- #ifdef DEBUG
- if (Debug > 1)
- printf("Token='%c' (0x%x), Value=%d\n",
- Token, Token, Value.ival);
- #endif
-
- return Token;
- }
-